********************************************************************************
******************** The Returns to Education: A Meta-study ********************
********************************************************************************

// Authors
* Gregory Clark
* Christian Alexander Abildgaard Nielsen

// Institution
* University of Southern Denmark

// Title
* The Returns to Education: A Meta-study



************************** Data download and reading ***************************

// Data available here: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/BUOUNW
// The data files are entitled "Returns_to_education.(xlsx/dta)"



// Importing Excel data
import excel "C:\Users\cniel\Dropbox\Forsknings opgaver\Litteratur\Effects of education studies\Kyklos submission\Revisions round 1\Returns_to_education.xlsx", sheet("Ark1") cellrange(A1:AM163) firstrow


// Destring all variables
destring _all, replace

// Set preferred scheme for figures
set scheme s1mono

// Estimates and standard errors in percentages
gen effect_percentage=effect*100
gen se_percentage = se*100
gen OLS_percentage = ols_effect*100

// Setting meta-data
gen es = effect_percentage
gen se1 = se_percentage
meta set es se1



**************************** Descriptive statistics ****************************

// Average estimate size/returns to schooling (in %)

// All estimates
sum effect_percentage, detail
// All independent estimates
sum effect_percentage if ind_est==1, detail
// All independent estimates, excluding working papers
sum effect_percentage if ind_est==1 & wp==0, detail


// Significance of estimates at 5%
codebook sig
sum sig, detail


// Other summary statistics (Appendix E)
sum effect_percentage se_percentage sig tstat fstat n full men women year_of_reform_numeric year_since_pub_2024 wp scholar_citations jour_imp_factor_2022 ols OLS_percentage iv_biggest poor_middle gdp_p_c MSLAbeforereform if ind_est==1


// Average returns to schooling weighted by precision
meta summarize if ind_est==1, random
meta summarize if ind_est==1, fixed



********************************* Heterogeneity ********************************

// Figure 5 - Heterogeneous across student age/minimum leaving age

// All independent estimates
reg effect_percentage MSLAbeforereform if ind_est==1, r
graph twoway (lfitci effect_percentage MSLAbeforereform) (scatter effect_percentage MSLAbeforereform) if ind_est==1, ytitle(Rate of Return (%)) xtitle (School Leaving Age) yline(0)

// without outlier (52% estimate)
reg effect_percentage MSLAbeforereform if ind_est==1 & effect_percentage~=51, r
graph twoway (lfitci effect_percentage MSLAbeforereform) (scatter effect_percentage MSLAbeforereform) if ind_est==1 & effect_percentage~=51, ytitle(Rate of Return (%)) xtitle (School Leaving Age) yline(0)
// We changed aesthetics using Graph Editor

// The same test, not assuming a linear relationship
reg effect_percentage i.MSLAbeforereform if ind_est==1 & effect_percentage~=51, r


// Rich/poor-middle income differences

// Average returns with sample split between rich and poor-middle income countries
mean effect_percentage if poor_middle==1 & ind_est==1
mean effect_percentage if poor_middle==0 & ind_est==1
// Standard errors
mean se_percentage if poor_middle==1 & ind_est==1
mean se_percentage if poor_middle==0 & ind_est==1

// Isolated effect of GDP per cap, when controlling for sample size
reg effect_percentage gdp_p_c se_percentage if ind_est==1, r
reg effect_percentage gdp_p_c se_percentage if ind_est==1 & se_percentage<10.1, r
reg effect_percentage poor_middle se_percentage if ind_est==1, r
reg effect_percentage poor_middle se_percentage if ind_est==1 & se_percentage<10.1, r


// IV vs. OLS returns

// Number and percentage of cases with biggest IV/OLS estimate
codebook iv_biggest if ols==1
sum iv_biggest if ols==1

// Average returns in the above IV & OLS estimates
mean effect_percentage if ols==1
mean OLS_percentage if ols==1

// Average of all IV estimates (also those without corresponding OLS estimate)
mean effect_percentage if ind_est==1 & method_estimation=="IV"
// Average of all non-IV estimates (DD and RDD)
mean effect_percentage if ind_est==1 & method_estimation=="RDD"|method_estimation=="DD" & ind_est==1



************************** Tests for publication bias **************************

// Rate of Return versus Standard Error

// Figure 6
sum effect_percentage if ind==1 & se_percentage<2.51
sum effect_percentage if ind==1 & se_percentage>2.51 & se_percentage<5.01
sum effect_percentage if ind==1 & se_percentage>5.01 & se_percentage~=.
// The graph itself constructed in Excel


// Figure 7

// Generating gender variable
gen gender = .
replace gender = 1 if full == 1
replace gender = 2 if men == 1
replace gender = 3 if women == 1
label define genderlbl 1 "Full" 2 "Male" 3 "Female"
label values gender genderlbl

// Making country a numerical variable
encode country, gen(country1)

// Underlying regressions (two-tailed and one-tailed tests)

// All independent estimates, uncontrolled
reg effect_percentage se_percentage if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Without outliers (above 10% se)
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Without outliers (above 20% se)
reg effect_percentage se_percentage if ind_est==1 & se_percentage<20, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))


// Controls (sample size, GDP, MSLA, gender, country1)
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Only control for GDP
reg effect_percentage se_percentage gdp_p_c if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Only control for sample size
reg effect_percentage se_percentage n if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Only control for minimum school leavning age before reform
reg effect_percentage se_percentage MSLAbeforereform if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Only control for gender
reg effect_percentage se_percentage i.gender if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
// Only control for country
reg effect_percentage se_percentage i.country1 if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))


// Illustration of the relationship

// All independent estimates
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1, ytitle(Rate of Return (%)) xtitle (Standard Error (%)) yline(0)
// Without outliers (10% se)
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & se_percentage<10.1, ytitle(Rate of Return (%)) xtitle (Standard Error (%)) yline(0)
// Without outliers (20% se)
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & se<0.2, ytitle(Rate of Return (%)) xtitle (Standard Error (%)) yline(0)
// We changed aesthetics using Graph Editor


// PET-PEESE procedure

// Generating variable for PEESE
gen se_percentage2 = se_percentage^2

// PET & PEESE, uncontrolled, outliers excluded
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 [aw=1/(se^2)], r
reg effect_percentage se_percentage2 if ind_est==1 & se_percentage<10.1 [aw=1/(se^2)], r

// PET & PEESE, controlled, outliers excluded
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 & se_percentage<10.1 [aw=1/(se^2)], r
reg effect_percentage se_percentage2 gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 & se_percentage<10.1 [aw=1/(se^2)], r

// PET & PEESE, controlled, outliers included
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 [aw=1/(se^2)], r
reg effect_percentage se_percentage2 gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 [aw=1/(se^2)], r


// Relative Frequency of Estimated Rates of Return fitted to Normal and log-normal Distribution
// Figures 8, 9 and 10 made in Excel (see Excel file in online appendix titled: "Data for Figures 8-10")



******************************* Robustness checks ******************************

// Tests excluding IVs with weak f-statistics (replication of regressions illustrated in figure 7)

// Main regression (figure 7)
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Main regression with controls
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding all IVs
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 & method_estimation~="IV", r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below conventional of 10
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 & fstat>10, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below 20 (where positive bias is very likely according to Keane & Neal 2023)
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 & fstat>20, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below 104.7 (based on Lee et al. 2022 arguments)
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 & fstat>104.7, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below 20 (where positive bias is very likely according to Keane & Neal 2023) with controls
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 & se_percentage<10.1 & fstat>20, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below 104.7 (based on Lee et al. 2022 arguments) with controls
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 if ind_est==1 & se_percentage<10.1 & fstat>104.7, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Excluding IVs with f-statistics below 104.7 (based on Lee et al. 2022 arguments) with outliers
reg effect_percentage se_percentage if ind_est==1 & fstat>104.7, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))


// Distribution of t-statistics around significance levels

// 1.96, 0.20 bandwidth
sum tstat if tstat<=2.16 & tstat>=1.96 // above 1.96
sum tstat if tstat<=1.96 & tstat>=1.76 // below 1.96
// 2.58, 0.20 bandwidth
sum tstat if tstat<=2.78 & tstat>=2.58 // above 2.58
sum tstat if tstat<=2.58 & tstat>=2.38 // below 2.58


// 5% bins used in distributions here due to decreasing size of the utilized samples (they show actual returns in bins, normal distribution as solid line)

// Comparison of old and new papers

// Average returns (last 10 years (2015-2024) or earlier)
mean effect_percentage if year_since_pub_2024<=9 & ind_est==1
mean effect_percentage if year_since_pub_2024>9 & ind_est==1

// Average standard error (last 10 years (2015-2024) or earlier)
mean se_percentage if year_since_pub_2024<=9 & ind_est==1
mean se_percentage if year_since_pub_2024>9 & ind_est==1

// Inspecting number of negative estimates (last 10 years (2015-2024) or earlier)
codebook effect_percentage if year_since_pub_2024<=9 & ind_est==1, tab(50)
codebook effect_percentage if year_since_pub_2024>9 & ind_est==1, tab(50)


// Rate of Return versus Standard Error for old and new papers
// New papers
reg effect_percentage se_percentage if ind_est==1 & year_since_pub_2024<=9, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se) (scatter effect_percentage se) if ind_est==1 & year_since_pub_2024<=9, ytitle(Rate of Return (%)) xtitle (Standard Error (%))

// Distribution of estimates for new papers (normal distribution set at mean here)
hist effect_percentage if ind_est==1 & year_since_pub_2024<=9 & effect_percentage<50, frequency normal width (5) start (-35)

// Main regression with controls, also including publication year
reg effect_percentage se_percentage gdp_p_c n MSLAbeforereform i.gender i.country1 year_since_pub_2024 if ind_est==1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))


// Estimate sizes by publication year

// Generating publication year variable
gen pub_year = 2024-year_since_pub_2024

// All independent estimates
graph twoway (lfitci effect_percentage pub_year) (scatter effect_percentage pub_year) if ind_est==1, ytitle(Rate of Return (%)) xtitle (Publication Year) yline(0)

// without outlier (52% estimate)
graph twoway (lfitci effect_percentage pub_year) (scatter effect_percentage pub_year) if ind_est==1 & effect_percentage~=51, ytitle(Rate of Return (%)) xtitle (Publication Year) yline(0)


// Replication with other old/new thresholds
// 2017-2024
reg effect_percentage se_percentage if ind_est==1 & year_since_pub_2024<=7, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se) (scatter effect_percentage se) if ind_est==1 & year_since_pub_2024<=7, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & year_since_pub_2024<=7 & effect_percentage<50, frequency normal width (5) start (-35)
// 2019-2024
reg effect_percentage se_percentage if ind_est==1 & year_since_pub_2024<=5, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se) (scatter effect_percentage se) if ind_est==1 & year_since_pub_2024<=5, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & year_since_pub_2024<=5 & effect_percentage<50, frequency normal width (5) start (-35)
// 2021-2024
reg effect_percentage se_percentage if ind_est==1 & year_since_pub_2024<=3, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se) (scatter effect_percentage se) if ind_est==1 & year_since_pub_2024<=3, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & year_since_pub_2024<=3 & effect_percentage<50, frequency normal width (5) start (-35)



// Tests excluding poor countries

// Rate of Return vs. Standard Error
reg effect_percentage se_percentage if ind_est==1 & poor_middle==0, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
reg effect_percentage se_percentage if ind_est==1 & se_percentage<10.1 & poor_middle==0, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))

// Distribution of estimates
hist effect_percentage if ind_est==1 & poor_middle==0 & effect_percentage<50, frequency normal width (5) start (-35)



********************************** Extra tests *********************************

// Citation test

// Generating logaritmic citations variable (to lessen influence of outliers)
gen log_citations = log(scholar_citations)
// OLS-regression, logarithm of citations and effect sizes (controls: Year of publication, pioneer dummy)
reg log_citations effect_percentage year_since_pub_2024 pioneer, r
// OLS-regression, logarithm of citations and standard error (controls: Year of publication, pioneer dummy)
reg log_citations se_percentage year_since_pub_2024 pioneer, r


// Gender tests

// Average returns for men and women
sum effect_percentage if ind_est==1 & women==1, detail
sum effect_percentage if ind_est==1 & men==1, detail

// Generating gender variable
gen gender1=.
recode gender1 (.=0) if men==1
recode gender1 (.=1) if women==1

// Regressing gender on returns with inclusion of controls
reg effect_percentage gender1, r
// Inclusion of controls
reg effect_percentage gender1 gdp_p_c, r
reg effect_percentage gender1 gdp_p_c n MSLAbeforereform, r



*********************************** Appendix ***********************************

// Appendix F (figure 11)

// Rate of Return versus Standard Error as funnel plot

meta funnelplot if ind_est==1
meta funnelplot if se1<10.1 & ind_est==1

meta bias if ind_est==1, egger
meta bias gdp_p_c if ind_est==1, egger
meta bias gdp_p_c MSLAbeforereform if ind_est==1, egger
meta bias gdp_p_c MSLAbeforereform n if ind_est==1, egger
// We changed aesthetics using Graph Editor

// With split by old and new papers
gen paper_group = year_since_pub_2024 <= 9
label define group 0 "Old papers (1991-2014)" 1 "New papers (2015-2024)"
label values paper_group group
meta funnelplot if se1<10.1 & ind_est==1, by(paper_group)

// Color coded with full sample (red = old, blue = new)
gen byte newpaper = year_since_pub_2024 <= 9 if !missing(year_since_pub_2024)
label define newold 0 "Old papers" 1 "New papers"
label values newpaper newold

meta funnelplot if ind_est==1 & se1 < 10.1, msymbol(i) ///
    addplot( ///
        (scatter se1 es if ind_est==1 & se1 < 10.1 & newpaper==1, ///
            msymbol(diamond)  mcolor(blue) msize(small)) ///
        (scatter se1 es if ind_est==1 & se1 < 10.1 & newpaper==0, ///
            msymbol(diamond) mcolor(red)  msize(small)) ///
    ) ///
    legend( order(2 "New papers (2015-2024)" 3 "Old papers (1991-2014)") ) ///
    xtitle("Effect size (%)") ///
    ytitle("Standard error (%)")



// Footnote 12 - Robustness check with removal of working papers & papers from "less prestigious journals" proxied by Journal Impact factor (JIF) in 2022

// Excluding working papers
// Average return
sum effect_percentage if ind_est==1 & wp==0, detail
// Rate of Return versus Standard Error
reg effect_percentage se_percentage if ind_est==1 & wp==0, 
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
// Distribution of estimates (normal distribution around mean in this graph)
hist effect_percentage if ind_est==1 & wp==0, frequency normal width (5) start (-35)

// Excluding working papers & papers with journal impact factor below certain levels (normal distribution at the mean in histograms)
// JIF>0.9
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>0.9, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>0.9, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>0.9, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>0.9, frequency normal width (5) start (-35)
// JIF>1.1
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.1, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>1.1, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.1, frequency normal width (5) start (-35)
// JIF>1.3
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.3, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.3, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>1.3, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.3, frequency normal width (5) start (-35)
// JIF>1.6
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.6, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.6, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>1.6, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.6, frequency normal width (5) start (-35)
// JIF>1.8
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.8, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.8, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>1.8, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>1.8, frequency normal width (5) start (-35)
// JIF>2.1
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>2.1, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>2.1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>2.1, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>2.1, frequency normal width (5) start (-35)


// Including only estimates from most "prestigious" journals (estimates from the journals with highest impact factor)
// 3.2 and higher
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.1, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.1, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>3.1, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.1, frequency normal width (5) start (-35)
// 3.6 and higher
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.5, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.5, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>3.5, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.5, frequency normal width (5) start (-35)
// 3.8 and higher
sum effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.7, detail
reg effect_percentage se_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.7, r
test _b[se_percentage]=0
local sign_se = sign(_b[se_percentage])
display "Ho: coef <= 0  p-value = " ttail(r(df_r),`sign_se'*sqrt(r(F)))
graph twoway (lfitci   effect_percentage se_percentage) (scatter effect_percentage se_percentage) if ind_est==1 & wp==0 & jour_imp_factor_2022>3.7, ytitle(Rate of Return (%)) xtitle (Standard Error (%))
hist effect_percentage if ind_est==1 & wp==0 & jour_imp_factor_2022>3.7, frequency normal width (5) start (-35)



********************************************************************************